package com.xliang.spider.version02;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Statement;
import java.util.Iterator;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import com.xliang.db.DBUtil;
import com.xliang.spider.util.CookiesUtil;

public class SpiderScanVideo2 extends Thread
{
    
    private static Connection conn = null;
    
    private String url;
    
    public SpiderScanVideo2(String url)
    {
        this.url = url;
    }
    
    @Override
    public void run()
    {
        scanVideo(url);
    }
    
    public void scanVideo(String url)
    {
        int index = 1;
        try
        {
            Document rootDocument = Jsoup.connect(url).timeout(10000).get();
            Elements rootElements = rootDocument.getElementsByClass("cf");
            Iterator<Element> rootIterator = rootElements.iterator();
            while (rootIterator.hasNext())
            {
                System.out.println(rootIterator.next());
                Elements parentElements = rootIterator.next().select("h2 > a");
                for (Element parent : parentElements)
                {
                    String parentHref = parent.attr("href");
                    String parentTitle =
                        parent.text()
                            .replaceAll("：", "")
                            .replaceAll(":", "")
                            .replaceAll("“", "")
                            .replaceAll("”", "")
                            .replaceAll("’", "")
                            .replaceAll("。", "")
                            .replaceAll("\"", "")
                            .replaceAll("/", "、");
                    System.out.println(String.format("正在扫描%s的所有视频", parentTitle));
                    
                    try
                    {
                        Document childDocument = Jsoup.connect(parentHref).timeout(10000).get();
                        Elements childElements = childDocument.getElementsByClass("video-list").first().select("h2 > a");
                        for (Element child : childElements)
                        {
                            String href = child.attr("href");
                            String title =
                                child.text()
                                    .replaceAll("：", "")
                                    .replaceAll(":", "")
                                    .replaceAll("“", "")
                                    .replaceAll("”", "")
                                    .replaceAll("’", "")
                                    .replaceAll("。", "")
                                    .replaceAll("\"", "")
                                    .replaceAll("/", "、");
                            String videoUrl = "";
                            String videoName = String.format("%03d.%s-%s.mp4", index, parentTitle, title);
                            if (href.startsWith("javascript"))
                            {
                                videoUrl = href;
                            }
                            else
                            {
                                try
                                {
                                    Document document = Jsoup.connect(href).timeout(10000).cookies(CookiesUtil.createJKCookies()).get();
                                    Element video = document.getElementsByTag("source").first();
                                    videoUrl = video.attr("src");
                                    System.out.println(String.format("扫描到视频：%s的地址为：%s", videoName, videoUrl));
                                    if (videoUrl.startsWith("javascript"))
                                    {
                                        videoUrl = href;
                                    }
                                }
                                catch (Exception e)
                                {
                                    videoUrl = href;
                                    System.out.println(String.format("扫描到视频：%s的地址%s失败", videoName, videoUrl));
                                    e.printStackTrace();
                                }
                            }
                            // addVideo(videoName, videoUrl);
                            // repeatAdd(videoName, videoUrl);
                            index++;
                        }
                    }
                    catch (Exception e)
                    {
                        index++;
                        e.printStackTrace();
                    }
                }
            }
            SpiderScanVideo2.interrupted();
        }
        catch (Exception e)
        {
            index++;
            e.printStackTrace();
        }
        
    }
    
    public int addVideo(String videoName, String videoUrl)
    {
        int index = 0;
        PreparedStatement pst = null;
        try
        {
            String sql = "insert into video(video_name,video_url) values(?,?)";
            if (conn == null)
            {
                conn = DBUtil.open();
            }
            pst = conn.prepareStatement(sql, Statement.RETURN_GENERATED_KEYS);
            pst.setString(1, videoName);
            pst.setString(2, videoUrl);
            index = pst.executeUpdate();
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            DBUtil.close(pst);
        }
        return index;
    }
    
    public void repeatAdd(String videoName, String videoUrl)
    {
        PreparedStatement pst = null;
        try
        {
            String selectSql = "select * from video where video_name = ?";
            if (conn == null)
            {
                conn = DBUtil.open();
            }
            pst = conn.prepareStatement(selectSql);
            pst.setString(1, videoName);
            
            ResultSet rs = pst.executeQuery();
            if (rs.next())
            {
                int id = rs.getInt("id");
                rs.close();
                pst.close();
                if (id > 0)
                {
                    String updateSql = "update video set is_download=0,video_url=? where id=?";
                    pst = conn.prepareStatement(updateSql);
                    pst.setString(1, videoUrl);
                    pst.setInt(2, id);
                    pst.executeUpdate();
                    System.out.println(String.format("成功更新视频%s的地址为%s", videoName, videoUrl));
                }
                else
                {
                    System.out.println(String.format("视频%s已经下载，跳过。。。", videoName));
                }
            }
            else
            {
                System.out.println(String.format("视频%s已经下载，跳过。。。", videoName));
            }
        }
        catch (Exception e)
        {
            e.printStackTrace();
        }
        finally
        {
            DBUtil.close(pst);
        }
    }
    
    public static void main(String[] args)
        throws Exception
    {
        new SpiderScanVideo2("http://ke.jikexueyuan.com/zhiye/android/");
    }
}
